##Setup Install the correct packages and load the libraries in. If you have not installed the tidyverse package, make sure you uncomment the below lines.

#install.packages("tidyverse") 
#install.packages("lubridate")
library(tidyverse)
library(lubridate)

#load the data
wichita <- read_csv("https://datajournalism.tech/wp-content/uploads/2019/10/wichita.csv")

population <- tibble(subject_race= c("asian/pacific islander", "black", "hispanic", "other/unknown", "white"), num_people=c(19272, 42679, 63659, 13351, 246343))

center_lat <- 37.692963
center_lng <- -97.323992

Data Analysis

Explore the dataset provided by Stanford University. See more on their website https://openpolicing.stanford.edu.

View(wichita) #to view the data table
str(wichita)  #to see the characteristics of variables
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 57750 obs. of  22 variables:
##  $ X1                     : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ raw_row_number         : chr  "923578" "923657" "912091" "923680" ...
##  $ date                   : Date, format: "2016-01-01" "2016-01-01" ...
##  $ time                   : 'hms' num  18:00:00 18:08:00 18:11:00 18:13:00 ...
##   ..- attr(*, "units")= chr "secs"
##  $ location               : chr  "N WEST ST, KS, 67205" "8000 W 13TH ST N, WICHITA, KS, 67212" "500 S LIMUEL ST, WICHITA, KS, 67235" "7600 W 21ST ST N, WICHITA, KS, 67205" ...
##  $ lat                    : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ lng                    : num  -97.4 -97.4 -97.5 -97.4 -97.4 ...
##  $ subject_age            : num  16 44 20 21 28 27 15 20 23 NA ...
##  $ subject_race           : chr  "white" "white" "white" "hispanic" ...
##  $ subject_sex            : chr  "female" "male" "male" "female" ...
##  $ type                   : chr  "vehicular" "vehicular" "vehicular" "vehicular" ...
##  $ disposition            : chr  "DISMISSED" "GUILTY (IVR)" "DISMISSED WITH PREJUDICE; DISMISSED WITH PREJUDICE" "GUILTY" ...
##  $ violation              : chr  "RUN STOP SIGN" "SPEED OVER LIMIT" "DUI; INATTENTIVE DRIVING" "SPEED OVER LIMIT" ...
##  $ citation_issued        : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ outcome                : chr  "citation" "citation" "citation" "citation" ...
##  $ posted_speed           : num  NA 40 NA 40 40 40 NA NA NA NA ...
##  $ vehicle_color          : chr  "BURGUNDY OR MAROON" "\"ALUMINUM, SILVER\"" "WHITE" "\"ALUMINUM, SILVER\"" ...
##  $ vehicle_make           : chr  "JEEP (1989 TO PRESENT)" "HYUNDAI" "HONDA" "TOYOTA" ...
##  $ vehicle_model          : chr  NA "TUCSON" NA NA ...
##  $ vehicle_year           : num  2008 NA NA NA NA ...
##  $ raw_defendant_race     : chr  "W" "W" "W" "W" ...
##  $ raw_defendant_ethnicity: chr  "N" "N" "N" "H" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   raw_row_number = col_character(),
##   ..   date = col_date(format = ""),
##   ..   time = col_time(format = ""),
##   ..   location = col_character(),
##   ..   lat = col_double(),
##   ..   lng = col_double(),
##   ..   subject_age = col_double(),
##   ..   subject_race = col_character(),
##   ..   subject_sex = col_character(),
##   ..   type = col_character(),
##   ..   disposition = col_character(),
##   ..   violation = col_character(),
##   ..   citation_issued = col_logical(),
##   ..   outcome = col_character(),
##   ..   posted_speed = col_double(),
##   ..   vehicle_color = col_character(),
##   ..   vehicle_make = col_character(),
##   ..   vehicle_model = col_character(),
##   ..   vehicle_year = col_double(),
##   ..   raw_defendant_race = col_character(),
##   ..   raw_defendant_ethnicity = col_character()
##   .. )
glimpse(wichita) #to see a short summary of values in each column
## Observations: 57,750
## Variables: 22
## $ X1                      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,...
## $ raw_row_number          <chr> "923578", "923657", "912091", "923680"...
## $ date                    <date> 2016-01-01, 2016-01-01, 2016-01-01, 2...
## $ time                    <time> 18:00:00, 18:08:00, 18:11:00, 18:13:0...
## $ location                <chr> "N WEST ST, KS, 67205", "8000 W 13TH S...
## $ lat                     <dbl> 37.74143, 37.70880, 37.67482, 37.72402...
## $ lng                     <dbl> -97.38976, -97.44059, -97.48999, -97.4...
## $ subject_age             <dbl> 16, 44, 20, 21, 28, 27, 15, 20, 23, NA...
## $ subject_race            <chr> "white", "white", "white", "hispanic",...
## $ subject_sex             <chr> "female", "male", "male", "female", "m...
## $ type                    <chr> "vehicular", "vehicular", "vehicular",...
## $ disposition             <chr> "DISMISSED", "GUILTY (IVR)", "DISMISSE...
## $ violation               <chr> "RUN STOP SIGN", "SPEED OVER LIMIT", "...
## $ citation_issued         <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR...
## $ outcome                 <chr> "citation", "citation", "citation", "c...
## $ posted_speed            <dbl> NA, 40, NA, 40, 40, 40, NA, NA, NA, NA...
## $ vehicle_color           <chr> "BURGUNDY OR MAROON", "\"ALUMINUM, SIL...
## $ vehicle_make            <chr> "JEEP (1989 TO PRESENT)", "HYUNDAI", "...
## $ vehicle_model           <chr> NA, "TUCSON", NA, NA, "SILVERADO", "NE...
## $ vehicle_year            <dbl> 2008, NA, NA, NA, NA, NA, NA, 2008, 20...
## $ raw_defendant_race      <chr> "W", "W", "W", "W", "W", "W", "W", "W"...
## $ raw_defendant_ethnicity <chr> "N", "N", "N", "H", "H", "N", "H", "H"...
colnames(wichita) #to view column headers
##  [1] "X1"                      "raw_row_number"         
##  [3] "date"                    "time"                   
##  [5] "location"                "lat"                    
##  [7] "lng"                     "subject_age"            
##  [9] "subject_race"            "subject_sex"            
## [11] "type"                    "disposition"            
## [13] "violation"               "citation_issued"        
## [15] "outcome"                 "posted_speed"           
## [17] "vehicle_color"           "vehicle_make"           
## [19] "vehicle_model"           "vehicle_year"           
## [21] "raw_defendant_race"      "raw_defendant_ethnicity"

After viewing the dataset, you can analyze it to see the min, max, mean, median and other values for each variable. These are called descriptive statistics.

summary(wichita)
##        X1        raw_row_number          date                time         
##  Min.   :    1   Length:57750       Min.   :2016-01-01   Length:57750     
##  1st Qu.:14438   Class :character   1st Qu.:2016-03-16   Class1:hms       
##  Median :28876   Mode  :character   Median :2016-05-29   Class2:difftime  
##  Mean   :28876                      Mean   :2016-06-10   Mode  :numeric   
##  3rd Qu.:43313                      3rd Qu.:2016-08-31                    
##  Max.   :57750                      Max.   :2016-12-31                    
##                                                                           
##    location              lat             lng           subject_age   
##  Length:57750       Min.   :37.47   Min.   :-101.36   Min.   :11.00  
##  Class :character   1st Qu.:37.67   1st Qu.: -97.37   1st Qu.:24.00  
##  Mode  :character   Median :37.69   Median : -97.34   Median :33.00  
##                     Mean   :37.69   Mean   : -97.33   Mean   :36.71  
##                     3rd Qu.:37.70   3rd Qu.: -97.28   3rd Qu.:48.00  
##                     Max.   :38.48   Max.   : -96.75   Max.   :99.00  
##                     NA's   :1167    NA's   :1167      NA's   :10128  
##  subject_race       subject_sex            type          
##  Length:57750       Length:57750       Length:57750      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  disposition         violation         citation_issued   outcome         
##  Length:57750       Length:57750       Mode:logical    Length:57750      
##  Class :character   Class :character   TRUE:57750      Class :character  
##  Mode  :character   Mode  :character                   Mode  :character  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##   posted_speed    vehicle_color      vehicle_make       vehicle_model     
##  Min.   : 20.00   Length:57750       Length:57750       Length:57750      
##  1st Qu.: 30.00   Class :character   Class :character   Class :character  
##  Median : 40.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 39.93                                                           
##  3rd Qu.: 40.00                                                           
##  Max.   :304.00                                                           
##  NA's   :35149                                                            
##   vehicle_year   raw_defendant_race raw_defendant_ethnicity
##  Min.   :1962    Length:57750       Length:57750           
##  1st Qu.:2001    Class :character   Class :character       
##  Median :2005    Mode  :character   Mode  :character       
##  Mean   :2005                                              
##  3rd Qu.:2009                                              
##  Max.   :2999                                              
##  NA's   :43236

the group_by verb helps you categorize your values into fewer groups. The summarize verb always goes along with the group_by to help count the number of values for each group and compute the percentage of each group over the whole population.

race <- group_by(wichita,subject_race) %>% summarize(value=n())
stop_rate <- left_join(population, race, by= "subject_race") %>% mutate(stop_rate = value/num_people)

##Data Visualization

###Bar Chart

#make the plot

bar <- ggplot(stop_rate,
       aes(x=reorder(subject_race,stop_rate), y=stop_rate))+
  geom_bar(stat="identity", 
           position="identity", 
           fill="red")+
  geom_hline(yintercept = 0) +
  labs(title="Drivers stopped by police in Wichita Kansas (2016)",
       subtitle = "African American drivers got stopped more than white drivers")+
  coord_flip()

options(scipen=10000)

bar

You can export the graphics by running the names of the objects in the Console and click the Export button under the Plots tab. Otherwise, you can use one of the following codes to export it:

ggsave("bar.png", width=40, height=20, units= "cm")
ggsave("bar.svg", width=40, height=20, units= "cm") 
#ggsave only saves the last plot you created, so you may want to go up to the line chart and write ggsave("line.png")

###Interactive Map with leaflet

#install.packages("httpuv")
#install.packages("leaflet")
library(httpuv)
library(leaflet)
race <- colorFactor(c("white", "black", "gold", "darkorange", "gray53"), domain=c("white", "black", "asian/pacific islander", "hispanic", "other/unknown"), ordered=TRUE)
map <- leaflet(wichita) %>%
  addProviderTiles(providers$OpenStreetMap) %>% 
  setView(lng=center_lng, lat= center_lat, zoom=10) %>% 
  addCircleMarkers(~lng, ~lat, popup=paste("This is a", wichita$subject_race, "and", wichita$subject_sex, "driver."), weight=2, radius=2, color=~race(subject_race), stroke=F, fillOpacity=1)
## Warning in validateCoords(lng, lat, funcName): Data contains 1167 rows with
## either missing or invalid lat/lon values and will be ignored
map